###################
## Nested Pseudo likelihood Estimation Heterogeneous ***WITH*** fe at parish level
## To be run after Mlogit_NAPPHeterov3agecohortsbeliefs.R
## "NAPP/pooleddataNAPPnew",wexernei,".RData" comes from 
## Mlogit_NAPPHeterov3agecohortsbeliefs.R and activating Robust agecohorts1530 or agecohorts1545. 
## agerange should be set as:
## 1. agerange <- 1530 If cohorts forming beliefs are between 15-30 years old
## 2. agerange <- 1545 If cohorts forming beliefs are between 15-45 years old
###################

rm(list=ls(all=TRUE))
set.seed(12345)
#root <- "S:/NetworkParish/HPC/"
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
#root <- "C:/HPC/"
setwd(paste(root,"Results/ReStat/",sep=""))
#codes <- "C:/HPC/CODE/"
codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
#wexernei is w50list which is our benchmark estimate
wexerneia <- "agecohorts"
agerange <- "1545" #"1530"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
AM <- 0 #if using Aguirregabiria and Mira==1, 0 if using Kasahara and Shimotsu

if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/KS/",sep="")}
sink(paste(rootsave,"NAPPEstimationHetewfenew",wexerneia,agerange,".txt",sep="")) 
options(max.print=1000000)
gc()

#######
#Load Data
# Identify those individuals age 15-30 who live in parishes of at least 30 other individuals in that cohort
load(file=paste(root,"NAPP/ReStat/pooleddataNAPPnewage",agerange,".RData",sep="" ))
HHage1530 <- unique(pooleddata$ID)
IDSocialage1530 <- unique(pooleddata$IDSocial)

load(file=paste(root,"NAPP/ReStat/pooleddataNAPPnew",wexerneia,agerange,".RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
load(file=paste(root,"NAPP/ReStat/pooleddataNAPPhomonew",wexerneia,agerange,".RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPnew",wexerneia,agerange,".RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomonew",wexerneia,agerange,".RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
load(file=paste(root,"NAPP/ReStat/datalistNAPPnew",wexerneia,agerange,".RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
load(file=paste(root,"NAPP/ReStat/coordslistNAPPnew",wexerneia,agerange,".RData",sep="" )) #coordslist, Lists of coordinates per parish
load(file=paste(root,"NAPP/ReStat/dneighboursNAPPnew",wexerneia,agerange,".RData",sep="")) #dneighbours, Lists of neigbours identities per parish
load(file=paste(root,"NAPP/ReStat/wlistNAPPnew",wexerneia,agerange,".RData",sep="")) #wlist, Sparse Lists of neighbours per parish

#define which variables we include in the estimation and which formula we use
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "SEX", "stayerc"
wvar <- "sw"
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))          
fformula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep="")))          
fformula <- fformula1
namesmlexer <- c(paste("Mlmwnew",wexerneia,agerange,".RData",sep=""),paste("Mlmtwnew",wexerneia,agerange,".RData",sep=""))
#sum(!is.na(fformulas))

#Outer Iteration: Maximum likelihood
alter <- sort(unique(pooleddata$alt))
L <- length(alter)
it0 <- 1 
maxiter0 <- 50
zero0 <- 1E-8
Jeit <- matrix(rep(0,maxiter0*L),nrow=maxiter0) #Keep track of the coefficients
ndJeit <- L

#To keep original pooleddata to implement Weidner (2012) method
pooleddata0 <- pooleddata
pooleddataage1530 <- pooleddata[pooleddata$ID %in% HHage1530,]
datalist0 <- datalist
pooleddatalist0 <- pooleddatalist
pooleddatalistage1530 <- pooleddatalist[pooleddatalist$ID %in% HHage1530,]
pooleddata <- tolong(pooleddatalist,"choice",c("sw.")) # to convert datalist class "data.table" into "mlogit.data"
pooleddataage1530 <- tolong(pooleddatalistage1530,"choice",c("sw.")) # to convert datalist class "data.table" into "mlogit.data"

coordslist0 <- coordslist
wlist0 <- wlist
#wklist0 <- wklist

#defining weight
weigh <- wlist

#To keep original pooleddata to implement Weidner (2012) method
pooleddata00 <- pooleddata0

#to create a pooleddatalist that has columns of actual cocupational choice
class <- factor(pooleddatalist$choice) #defining factors based on class 
class.m <- model.matrix(~ -1 + class) #creating columnsdummy vars for each class category
class.mage31 <- class.m[!(pooleddatalist$ID %in% HHage1530),]
#to define outer iteration of expectations
ndexpectito <- L
ndexpectitoall <- matrix(0,nrow=maxiter0) #Keep track of the tolerance level

#for expectations we are only interested in those age 15-30
expeco <- subset(pooleddatalist[(pooleddatalist$ID %in% HHage1530),],select=paste(wvar,".",alter,sep=""))

while (it0<=maxiter0 & ndexpectito > zero0) {
  print(paste("ML iteration: ",it0, sep=""))
  
  #Estimating
  if (it0==1) {
    load(paste("Mlmtwnew",wexerneia,agerange,".RData",sep="")) 
    ml.m.w <- ml.mt.w   
	  ml.m.w0 <- ml.m.w
	}  else  ml.m.w <- mnlogit(fformula, pooleddataage1530, "alt") #for the estimation we use only sample age 15-30
  
  #Define starting conditions, if we want to give them staring values based on theta
  #if (it0==1) ml.m.w$coeff[paste(wvar,":",alter,sep="")] <- runif(L,2,4) 
  
  #Keeping coeff  
  Jeit[it0,] <- summary(ml.m.w)$coeff[paste(wvar,":",alter,sep="")] 
  print(Jeit[it0,])
  ndexpectitoall[it0,] <- ndexpectito 
  print(ndexpectitoall[it0,])
  
  #Inner iteration, fixed point in beliefs
  it <- 1 
  zero <- 1E-8
  #maxiter <- 1 #if iterating a la Aguirregabiria and Mira (2007) or a la Weidner (2012)
  maxiter <- 100 #If iterating with the fixed point a la Lee Lin Liu (2014)
  expectit <- matrix(rep(0,maxiter*L),nrow=maxiter)
  ndexpectit <- L
  
  #subsetting sample to those age 15-30
  expec <- subset(pooleddatalist[(pooleddatalist$ID %in% HHage1530),],select=paste(wvar,".",alter,sep=""))
  #iteration of beliefs
  while (it<=maxiter & ndexpectit > zero) {
    print(paste("Inner Iteration ",it,sep=""))  
    #keepint track of expectations
    expectit[it,] <- apply(expec,2,mean)
    
    
	  alpha <- .1 #Kasahara and Shimotsu (2012)
	  #Predict probabilities on the pooled data list
	  tempprob <- predict(ml.m.w,pooleddata,probability=TRUE) # we predict probabilities using the whole sample
    for(aa in sort(alter, decreasing=TRUE) ){
      ap <- which(alter==aa)
      if (aa != min(alter)) { #to guarantee KS probabilities are within [0,1] bounds
        if (AM==1) { 
          # Replace estimated probability of following occupation for age15-30
          pooleddatalist[(pooleddatalist$ID %in% HHage1530),paste(wvar,".",aa,sep="")] <- tempprob[(pooleddatalist$ID %in% HHage1530),ap] #Aguirregabiria and Mira (2007)
          # Keep actual occupation choice for age31
          pooleddatalist[!(pooleddatalist$ID %in% HHage1530),paste(wvar,".",aa,sep="")] <- class.m[!(pooleddatalist$ID %in% HHage1530),ap] #Aguirregabiria and Mira (2007)
        } else {
          # Replace estimated probability of following occupation for age15-30 using KS 2012
          pooleddatalist[(pooleddatalist$ID %in% HHage1530),paste(wvar,".",aa,sep="")] <- ((tempprob[(pooleddatalist$ID %in% HHage1530),ap])^alpha)*((ml.m.w0$probabilities[,ap])^(1-alpha)) #Kasahara and Shimotsu (2012)
          # Keep actual occupation choice for age31
          pooleddatalist[!(pooleddatalist$ID %in% HHage1530),paste(wvar,".",aa,sep="")] <- class.m[!(pooleddatalist$ID %in% HHage1530),ap] #Aguirregabiria and Mira (2007)
        }
      } else {
        if (AM==1) { 
          # Replace estimated probability of following occupation for age15-30
          pooleddatalist[(pooleddatalist$ID %in% HHage1530),paste(wvar,".",aa,sep="")] <- 1-apply(tempprob[(pooleddatalist$ID %in% HHage1530),paste(alter[alter!=min(alter)],sep="")],1,sum) 
          # Keep actual occupation choice for age31
          pooleddatalist[!(pooleddatalist$ID %in% HHage1530),paste(wvar,".",aa,sep="")] <- 1-apply(class.m[!(pooleddatalist$ID %in% HHage1530),paste("class",alter[alter!=min(alter)],sep="")],1,sum) 
        } else {
          # Replace estimated probability of following occupation for age15-30 using KS 2012
          pooleddatalist[(pooleddatalist$ID %in% HHage1530),paste(wvar,".",aa,sep="")] <- 1-apply(((tempprob[(pooleddatalist$ID %in% HHage1530),paste(alter[alter!=min(alter)],sep="")])^alpha)*((ml.m.w0$probabilities[,paste(alter[alter!=min(alter)],sep="")])^(1-alpha)),1,sum) #Kasahara and Shimotsu (2012)
          # Keep actual occupation choice for age31
          pooleddatalist[!(pooleddatalist$ID %in% HHage1530),paste(wvar,".",aa,sep="")] <- 1-apply(class.m[!(pooleddatalist$ID %in% HHage1530),paste("class",alter[alter!=min(alter)],sep="")],1,sum) 
        }
      }
    }
    
    #splitting the previous data into a list 
    datalist  <- split(pooleddatalist, list(pooleddatalist$IDSocial))
    
    #spatially weighting
    provweigh <- vector(length=L,mode="list")
    for(aa in alter){
      ap <- which(alter==aa)
      provweigh[[ap]] <- mapply(weightinga,datalist,weigh)
    }
    
    #updating pooled datalist
    pooleddatalist <- rbindlist(datalist)
    for(aa in alter){
      ap <- which(alter==aa)
      pooleddatalist[[paste(wvar,".",aa,sep="")]] <- unlist(provweigh[[ap]])
    }
    rm(provweigh)
    attr(pooleddatalist,"class") <- "data.frame"
    #updating expectations taking only age15-30
    expec1 <- subset(pooleddatalist[(pooleddatalist$ID %in% HHage1530),],select=paste(wvar,".",alter,sep=""))
    
    #Updating pooleddata (long format)
    for(aa in alter){
      pooleddata[pooleddata$alt==aa,][[paste(wvar,sep="")]] <- pooleddatalist[[paste(wvar,".",aa,sep="")]] 
    }
    
    #updating pooleddata for those age15-30
    pooleddataage1530 <- pooleddata[pooleddata$ID %in% HHage1530,]
    
    if (it>=2) ndexpectit <- max(as.matrix(abs(expec1-expec))) # the maximum of all matrix entries ndexpectit <- sum(distr(expec,expec1)) #element by element
    print(paste("Max abs diff inner: ",ndexpectit,sep=""))
    expec <- expec1
    it <- it+1
  }
  if (it<=maxiter & ndexpectit < zero) print(paste("Fixed point in beliefs convergence reached at iter: ",it-1,sep="")) else print(paste("Fixed point in beliefs one step update it: ",it,sep="")) #print(paste("Fixed point in beliefs FAILED to converge after it: ",it,sep=""))
  
  #updating expectations
  expec1o <- subset(pooleddatalist[(pooleddatalist$ID %in% HHage1530),],select=paste(wvar,".",alter,sep=""))
  
  if (it0>=2) ndexpectito <- max(as.matrix(abs(expec1o-expeco))) # the maximum of all matrix entries ndJeit <- sum(abs(Jeit[it0,]-Jeit[it0-1,]))
  print(paste("Diff Abs max ",ndexpectito,sep=""))
  expeco <- expec1o
  
  #To keep the previous probabilities to perform Kasahara and Shimotsu
  ml.m.w0 <- ml.m.w
  it0 <- it0+1
}

print(paste("Outer ML Heterogenous iter converged at iter: ",it0-1," using weights: ",wvar,sep=""))
Jeit[it0-1,] <- summary(ml.m.w)$coeff[paste(wvar,":",alter,sep="")]
print(Jeit[it0-1,])
ndexpectitoall[it0-1,] <- ndexpectito 
print(ndexpectitoall[it0-1,])

#Saving last iteration results
save(ml.m.w,file=paste(rootsave,"PMLmtwfinalnew",wexerneia,agerange,".RData",sep=""))
print(summary(ml.m.w))

#Plot convergence in coefficientes 
#coefficients
colnames(Jeit)<-c(paste("J.",alter,sep=""))
dataJeit<-as.data.frame(Jeit[c(1:(it0-1)),])
dataJeit$iter <- c(1:nrow(dataJeit))
meltdataJeit<-melt(dataJeit,id="iter")
cairo_ps(paste(rootsave,"JePMLmtwfinalnew",wexerneia,agerange,".eps",sep=""), width=7, height=7) #to keep transparency from the intervals
print(ggplot(meltdataJeit,aes(x=iter,y=value,colour=variable,group=variable)) + geom_line()+ ylab(expression(paste(J[y]^{t})))+xlab("t"))
dev.off()
save(dataJeit,file=paste(rootsave,"JePMLmtwfinalnew",wexerneia,agerange,".RData",sep=""))

#Plot convergence in Infinity Norm
#coefficients
colnames(ndexpectitoall)<-"AbsMax"
dataInftyNorm<-as.data.frame(ndexpectitoall[c(3:(it0-1))])
colnames(dataInftyNorm)<-"AbsMax"
dataInftyNorm$iter <- c(1:nrow(dataInftyNorm))
meltdataInftyNorm<-melt(dataInftyNorm,id="iter")
cairo_ps(paste(rootsave,"InftyNormMLmtwfinalnew",wexerneia,agerange,".eps",sep=""), width=7, height=7) #to keep transparency from the intervals
print(ggplot(meltdataInftyNorm,aes(x=iter,y=value,colour=variable,group=variable)) + geom_line()+ ylab(expression(InftyNorm))+xlab("t"))
dev.off()
save(dataInftyNorm,file=paste(rootsave,"InftyNormMLmtwfinalnew",wexerneia,agerange,".RData",sep=""))

sink()
